1 Aggregated and atomic scores per method

#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> 
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ggplot2':
#> 
#>     last_plot
#> The following object is masked from 'package:stats':
#> 
#>     filter
#> The following object is masked from 'package:graphics':
#> 
#>     layout


# datasets = read_yaml("datasets.yml") 
# print(score_file)

# datasets = read_yaml("datasets.yml") 
# datasets = read_yaml(file_dataset) 


list_wd = strsplit(getwd(),'/')[[1]]
# Snakemake script : the current working dir is hadaca3_framework
if(list_wd[length(list_wd)] == 'hadaca3_framework'){
  score_files = list(list.files(path = "./output/scores/", full.names = TRUE))

# nextflow script :
}else{
  score_files = list(list.files(pattern = 'score-li*' ))
}


results_li <- data.frame(
  dataset = character(),
  ref = character(),

  preprocessing_mixRNA = character(),
  feature_selection_mixRNA = character(),

  preprocessing_RNA = character(),
  feature_selection_RNA = character(),

  preprocessing_scRNA = character(),
  feature_selection_scRNA = character(),
  deconvolution_rna = character(),

  preprocessing_mixMET = character(),
  feature_selection_mixMET = character(),

  preprocessing_MET = character(),
  feature_selection_MET = character(),
  deconvolution_met = character(),
  late_integration = character(),
  
  aid = numeric(),
  aid_norm = numeric(),
  aitchison = numeric(),
  aitchison_norm = numeric(),
  jsd = numeric(),
  jsd_norm = numeric(),
  mae = numeric(),
  mae_norm = numeric(),
  pearson_col = numeric(),
  pearson_col_norm = numeric(),
  pearson_row = numeric(),
  pearson_row_norm = numeric(),
  pearson_tot = numeric(),
  pearson_tot_norm = numeric(),
  rmse = numeric(),
  rmse_norm = numeric(),
  score_aggreg = numeric(),
  sdid = numeric(),
  sdid_norm = numeric(),
  spearman_col = numeric(),
  spearman_col_norm = numeric(),
  spearman_row = numeric(),
  spearman_row_norm = numeric(),
  spearman_tot = numeric(),
  spearman_tot_norm = numeric()
)


i = 0 
for (score_file in score_files[[1]]) {
  # Extract the base name of the file

  base_name <- basename(score_file)

  # Extract components from the file name

  components <- str_match(base_name, 
  #       dt   ref  OMIC  ppmR fsmR omic ppR fsR omic  ppSR fsSR  deR   omic  ppmM fsmM omic ppM  fsM  deM  li
  # "score-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
  "score-li-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
  

  # components <- str_match(base_name, "score-(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)")[2:8]
  scores <- read_hdf5(score_file)
  # Append the extracted information to the results data frame
  results_li <- rbind(results_li,
    cbind(
     data.frame(
       dataset = components[1],
       ref = components[2],

       preprocessing_mixRNA = components[3],
       feature_selection_mixRNA = components[4],

       preprocessing_RNA = components[5],
       feature_selection_RNA = components[6],

       preprocessing_scRNA = components[7],
       feature_selection_scRNA = components[8],
       deconvolution_rna = components[9],

       preprocessing_mixMET = components[10],
       feature_selection_mixMET = components[11],

       preprocessing_MET = components[12],
       feature_selection_MET = components[13],
       deconvolution_met = components[14],

       late_integration = components[15],
       stringsAsFactors = FALSE
     ),
     scores
    ))
  rownames(results_li) = NULL

  i = i +1 
}

results_li %>%
  # filter(dc==2) %>%
  group_by(late_integration) %>%
  summarise(GlobalScore = median(score_aggreg)) %>%
  arrange(desc(GlobalScore))
#> # A tibble: 3 × 2
#>   late_integration GlobalScore
#>   <chr>                  <dbl>
#> 1 OnlyMet                0.663
#> 2 limeanRMSE             0.660
#> 3 OnlyRna                0.646





all_data_used = c('dataset', 'ref')
for(data_used in all_data_used){
  results_li[[data_used]] = factor(results_li[[data_used]], 
  levels = unique(results_li[[data_used]])) # levels will be alphabeticaly ordered
}



all_functions_li = c('preprocessing_mixRNA', 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA', 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna', 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET', 'feature_selection_MET', 'deconvolution_met', 'late_integration' )
for( fun in all_functions_li){
  results_li[[fun]] = factor(results_li[[fun]], 
  levels = unique(results_li[[fun]][order(results_li$score_aggreg[results_li$dataset=='invitro1'],decreasing = T)])) # sort based on the results_li on the in vitro dataset
}



index_aggreg <- which(names(results_li) == "score_aggreg")

datatable(
  results_li[, c(1:length(all_functions_li)+2, index_aggreg)],
  extensions = 'Buttons',
  options = list(
    pageLength = 10,
    dom = 'Bfrtip',  # This includes the Buttons extension in the layout
    buttons = list(
      list(
        extend = 'colvis',
        text = 'Show/Hide Columns',
        columns = ':not(:first-child)'  # This allows all columns except the first to be toggled
      )
    )
  )
)

2 Early integration_table

#> # A tibble: 0 × 2
#> # ℹ 2 variables: early_integration <chr>, GlobalScore <dbl>

3 Visualisations of the top 5 methods

#> `summarise()` has grouped output by 'preprocessing_mixRNA',
#> 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA',
#> 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna',
#> 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET',
#> 'feature_selection_MET', 'deconvolution_met', 'late_integration'. You can
#> override using the `.groups` argument.

4 Visualisations of the different metrics

4.1 Aggregated scores

4.1.1 PP

4.1.2 FS

4.1.3 DE

4.1.4 LI

4.2 MAE

4.2.1 PP

4.2.2 FS

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.2.3 DE

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.2.4 LI

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.3 RMSE

4.3.1 PP

4.3.2 FS

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.3.3 DE

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.3.4 LI

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.4 Spearman correlation (row)

4.4.1 PP

4.4.2 FS

4.4.3 DE

4.4.4 LI

4.5 Aitchison distance

4.5.1 PP

4.5.2 FS

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.5.3 DE

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.5.4 LI

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).